rikz <- read.table( 'ecol 562/RIKZ.txt', header=TRUE) apply(rikz[,2:76], 1, function(x) sum(x>0)) -> rikz$richness lm(richness~NAP, data=rikz) -> mod1 lm(richness~NAP+factor(week), data=rikz) -> mod2 lm(richness~NAP*factor(week), data=rikz) -> mod3 #log-likelihood for intercept-only model Pois.LL <- function(lambda) sum(log(dpois(rikz$richness,lambda))) #choose a starting estimate Pois.LL(2) Pois.LL(3) Pois.LL(4) Pois.LL(5) Pois.LL(6) Pois.LL(7) # unexpected behavior: returns one value, not two Pois.LL(c(5,7)) # compare with ordinary function f <- function(x) x^2 f(c(5,7)) #need to use sapply sapply(c(5,7), Pois.LL) sapply(seq(4,8,.01),Pois.LL)->out.L # plot log-likelihood plot(seq(4,8,.01),out.L,type='l',xlab=expression(lambda), ylab='log-likelihood') # obtain maximum numerically optimize(Pois.LL,c(4,7),maximum=T) # optim and nlminb locate minima, not maxima nlminb(5,function(x) -Pois.LL(x)) optim(5,function(x) -Pois.LL(x)) # Poisson regression with a single predictor NAP # identity link function Pois.LL2 <- function(x) { lambda <- x[1] + x[2]*rikz$NAP sum(log(dpois(rikz$richness,lambda))) } # some starting values yield illegal calculations Pois.LL2(1,-2) Pois.LL2(c(1,-2)) Pois.LL2(c(7,-2)) Pois.LL2(c(7,-3)) Pois.LL2(c(6,-3)) optim(c(7,-3),function(x) -Pois.LL2(x)) # numerically more stable to use log link Pois.LL2 <- function(x) { loglambda <- x[1] + x[2]*rikz$NAP sum(log(dpois(rikz$richness,exp(loglambda)))) } # results are now stable Pois.LL2(c(2,.5)) Pois.LL2(c(2,.25)) Pois.LL2(c(3,.25)) optim(c(2,.25),function(x) -Pois.LL2(x)) glm(richness~NAP,data=rikz,family=poisson)->mod1p summary(mod1p) anova(mod1p) anova(mod1p,test='Chisq') # log link is the default mod1p<-glm(richness~NAP, data=rikz, family=poisson) # identity link requires starting values mod1p1<-glm(richness~NAP, data=rikz, family=poisson(link=identity)) mod1p1<-glm(richness~NAP, data=rikz, family=poisson(link=identity), start=c(6,-3)) mod1p1<-glm(richness~NAP, data=rikz, family=poisson(link=identity), start=c(2,-.1)) coef(mod1p1) mod1p1<-glm(richness~NAP, data=rikz, family=poisson(link=identity), start=c(6.7,-2.9)) # can compare log-liklihoods of the two models logLik(mod1p) logLik(mod1p1) #to obtain log-likelihood of two models use sapply logLik(mod1p1,mod1p) sapply(list(mod1p1,mod1p),logLik) # predict returns predictions on scale of link function predict(mod1p) # fitted returns predictions on scale of original variable fitted(mod1p) # 3-dimensional picture of model mu=exp(predict(mod1p,newdata=data.frame(NAP=c(-1.5,0,1,2)))) y1 <- seq(0,20,1) x1a<-rep(-1.5,length(y1)) x1b<-rep(0,length(y1)) x1c<-rep(1,length(y1)) x1d<-rep(2,length(y1)) z1a <- dpois(y1,mu[1]) z1b <- dpois(y1,mu[2]) z1c <- dpois(y1,mu[3]) z1d <- dpois(y1,mu[4]) x<-c(x1a,x1b,x1c,x1d) y<-c(y1,y1,y1,y1) z<-c(z1a,z1b,z1c,z1d) q1 <- scatterplot3d(x,y,z, xlim=c(-1.5, 2), ylim=c(0, 20), type="n", xlab="NAP", zlab="Density", box=FALSE, ylab='Richness') q1$points3d(x1a, y1, z1a, type='h',cex=.8) q1$points3d(x1b, y1, z1b, type='h',cex=.8) q1$points3d(x1c, y1, z1c, type='h',cex=.8) q1$points3d(x1d, y1, z1d, type='h',cex=.8) mu2<-exp(predict(mod1p, newdata=data.frame(NAP=seq(-1.5,2,.1)))) q1$points3d(seq(-1.5,2,.1), mu2, rep(0,length(mu2)), type='l', lwd=2) # redo graph for identity link model windows() # quartz() on Mac mu=predict(mod1p1,newdata=data.frame(NAP=c(-1.5,0,1,2))) y1 <- seq(0,20,1) x1a<-rep(-1.5,length(y1)) x1b<-rep(0,length(y1)) x1c<-rep(1,length(y1)) x1d<-rep(2,length(y1)) z1a <- dpois(y1,mu[1]) z1b <- dpois(y1,mu[2]) z1c <- dpois(y1,mu[3]) z1d <- dpois(y1,mu[4]) x<-c(x1a,x1b,x1c,x1d) y<-c(y1,y1,y1,y1) z<-c(z1a,z1b,z1c,z1d) q1 <- scatterplot3d(x,y,z, xlim=c(-1.5, 2), ylim=c(0, 20), type="n", xlab="NAP", zlab="Density", box=FALSE, ylab='Richness') q1$points3d(x1a, y1, z1a, type='h',cex=.8) q1$points3d(x1b, y1, z1b, type='h',cex=.8) q1$points3d(x1c, y1, z1c, type='h',cex=.8) q1$points3d(x1d, y1, z1d, type='h',cex=.8) mu2<-predict(mod1p1, newdata=data.frame(NAP=seq(-1.5,2,.1))) #add regression line q1$points3d(seq(-1.5,2,.1), mu2, rep(0,length(mu2)), type='l', lwd=2) # add week to the regression model mod2p<-glm(richness~NAP+factor(week),data=rikz,family=poisson) mod3p<-glm(richness~NAP*factor(week),data=rikz,family=poisson) # test models anova(mod3p,test='Chisq') # examine estimates summary(mod3p) # compare log-likelihood of normal and Poisson model logLik(mod3) logLik(mod3p) # to get a probability from a density we need to integrate # but the density is the midpoint approximation of the integral # over a unit interval predict(mod3)[1] rikz$richness[1] fitted(mod3p)[1] # midpoint approximation dnorm(11,predict(mod3)[1],summary(mod3)$sigma) # actual probability pnorm(11.5,predict(mod3)[1],summary(mod3)$sigma)- pnorm(10.5,predict(mod3)[1],summary(mod3)$sigma) # for models with different numbers of parameters use AIC rather than LL AIC(mod3,mod3p) # check fit of model: use model to obtain Poisson means # display Poisson distributions for each site # superimpose observed value on this distribution sapply(0:30, function(x) dpois(x,exp(predict(mod3p)))) -> out dim(out) out[1:4,1:8] # create data frame to plot my.dat <- data.frame(p=as.vector(t(out)),x=rep(0:30,45), rich=rep(rikz$richness,rep(31,45)), id=rep(1:45,rep(31,45))) my.dat[1:8,] library(lattice) xyplot(p~x|factor(id),dat=my.dat,panel=function(x,y,subscripts){ panel.xyplot(x,y,type='h',lwd=2,col='grey70') panel.points(my.dat$rich[subscripts][1],0,col=2,pch=8,cex=.8)}) # could get better picture by removing negligible probabilities my.dat2 <- my.dat[my.dat$p>.0001,] xyplot(p~x|factor(id),dat=my.dat2,panel=function(x,y,subscripts){ panel.xyplot(x,y,type='h',lwd=2,col='grey70') panel.points(my.dat2$rich[subscripts][1],0,col=2,pch=8,cex=.8)}, scales=list(x='free'))